package com.crawler.richtext.utils;

import java.math.BigDecimal;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.htmlparser.Attribute;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.nodes.TagNode;
import org.htmlparser.tags.CompositeTag;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.util.NodeIterator;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.crawler.richtext.htmlparser.HtmlParser;

import net.sf.json.JSONArray;
import net.sf.json.JSONObject;

/**
 * 富文本工具
 * @author rubekid
 *
 * 2017年12月6日 上午11:21:57
 */
public class RichTextUtils {
	
	private static Logger logger = LoggerFactory.getLogger(RichTextUtils.class);
	
	/**
	 * 富文本转小程序Nodes
	 * @param content
	 * @return
	 */
	public static JSONArray toMiniProgramNodes(String content){
		JSONArray nodes = new JSONArray();
		if(content == null || "".equals(content)){
			return nodes;
		}
		
		try{

			//去html包裹
			if(content.indexOf("<body>") > -1){			
				content = content.replaceAll("[\\s\\S]*<body[^>]*>([\\s\\S]+)</body>[\\s\\S]*", "$1");
			}
			//图片处理
			content = content.replaceAll("<img[^>]+src=\"(.*?)(\\?.*?)*\"[^>]*?>", "<img style=\"max-width:100%;\" src=\"$1?imageView2/2/w/0/h/0/q/80\" />");
			
			Pattern pattern = Pattern.compile("width\\s*:\\s*([\\d\\.]+)px");
			Matcher matcher = pattern.matcher(content);
			while(matcher.find()){
				Double width = Double.parseDouble(matcher.group(1));
				if(width > 300){
					content = content.replace(matcher.group(), "");	
				}
			}
			

			Parser parser = new HtmlParser(content);
			
			 for (NodeIterator i = parser.elements (); i.hasMoreNodes(); ) {
                Node node = i.nextNode();
                CompositeTag tag = ((CompositeTag) node);
                JSONObject item = new JSONObject();
                String tagName = tag.getTagName().toLowerCase();
                if("article".equals(tagName) || "section".equals(tagName)) {
                	tagName = "div";
                }
                item.put("name", tagName);
                item.put("attrs", getAttrs(tag));
                item.put("children", parseJSONArray(node));
                nodes.add(item);
            }           
			
		}
		catch(ParserException ex){
			logger.error(ex.getMessage(), ex);
		}
		
		return nodes;	
	}
	
	/**
	 * 解析子节点
	 * @param parent
	 * @return
	 */
	private static JSONArray parseJSONArray(Node parent){
		JSONArray nodes = new JSONArray();
		NodeList nodeList = parent.getChildren();
		if(nodeList == null || nodeList.size() == 0){
			//System.out.println(parent.toHtml(true));
		}
		else{
			for (int i = 0; i < nodeList.size(); i++) {
	            Node node = nodeList.elementAt(i);
	            if(node instanceof TagNode){
	            	TagNode tag = ((TagNode) node);
	                JSONObject item = new JSONObject();
	                String tagName = tag.getTagName().toLowerCase();
	                if("article".equals(tagName) || "section".equals(tagName)) {
	                	tagName = "div";
	                }
	                item.put("name", tagName);
	                item.put("attrs", getAttrs(tag));
	                item.put("children", parseJSONArray(node));
	                nodes.add(item);
	            }
	            else{
	            	JSONObject item = new JSONObject();
	                item.put("type", "text");
	                item.put("text", node.getText());
	                nodes.add(item);
	            }
	        }
		}
	
		return nodes.size() > 0 ? nodes : null;
	}
	
	/**
	 * 获取标签属性
	 * @param tag
	 * @return
	 */
	private static JSONObject getAttrs(TagNode tag){
		JSONObject attrs = new JSONObject();
		Vector<?> attributes = tag.getAttributesEx ();
        if (null != attributes)
        {
            int size = attributes.size ();
            for (int i = 0; i < size; i++)
            {
            	Attribute attribute = (Attribute)attributes.elementAt (i);
            	String name = attribute.getName ();
                if ((null != name) && name.equalsIgnoreCase (name))
                {
                	String value = attribute.getValue();
            		if(value != null && !"".equals(value)){
            			name = name.toLowerCase();
            			if("style".equals(name)){
            				//value = px2Rpx(value);
            			}
            			attrs.put(name, value);
            		}	
                }
            }
        }
        if(tag instanceof ImageTag){
        	//attrs.put("width", "100%");
        }

		return attrs.size() > 0 ? attrs : null;
	}
	
	/**
	 * px 转 rpx
	 * @param style
	 * @return
	 */
	private static String px2Rpx(String style){
		Matcher matcher = Pattern.compile("(\\d+)px").matcher(style);
		matcher.reset();
        boolean result = matcher.find();
        if (result) {
            StringBuffer sb = new StringBuffer();
            do {
            	BigDecimal val = new BigDecimal(matcher.group(1)).multiply(new BigDecimal(2));
                matcher.appendReplacement(sb, val + "rpx");
                result = matcher.find();
            } while (result);
            matcher.appendTail(sb);
            return sb.toString();
        }
        return style;
	}
	
	public static void main(String[] args) {
		String content = "<p style=\"margin: 0px auto; padding: 10px; line-height: 1.4; width: 774.188px; font-size: 48px; font-weight: bold; text-align: center;\">生活中点石成金的财务资讯<br/></p><p><span style=\"margin: 0px 0px 0px 10px; padding: 0px;\">作&nbsp;&nbsp;者:</span><span style=\"margin: 0px 0px 0px 20px; padding: 0px;\"><span style=\"\">何汕媛 著作</span></span></p><p><img src=\"https://img.alicdn.com/imgextra/i1/101450072/TB28iyVhwxlpuFjSszgXXcJdpXa-101450072.png\" alt=\"\" class=\"img-ks-lazyload\"/></p><p><span style=\"margin: 0px 0px 0px 10px; padding: 0px;\">定&nbsp;&nbsp;价:</span><span style=\"margin: 0px 0px 0px 20px; padding: 0px;\">28</span></p><p><img src=\"https://img.alicdn.com/imgextra/i1/101450072/TB28iyVhwxlpuFjSszgXXcJdpXa-101450072.png\" alt=\"\" class=\"img-ks-lazyload\"/></p><p><span style=\"margin: 0px 0px 0px 10px; padding: 0px;\">出 版 社:</span><span style=\"margin: 0px 0px 0px 20px; padding: 0px;\">西南财经大学出版社</span></p><p><img src=\"https://img.alicdn.com/imgextra/i1/101450072/TB28iyVhwxlpuFjSszgXXcJdpXa-101450072.png\" alt=\"\" class=\"img-ks-lazyload\"/></p><p><span style=\"margin: 0px 0px 0px 10px; padding: 0px;\">出版日期:</span><span style=\"margin: 0px 0px 0px 20px; padding: 0px;\">2013-01-01</span></p><p><img src=\"https://img.alicdn.com/imgextra/i1/101450072/TB28iyVhwxlpuFjSszgXXcJdpXa-101450072.png\" alt=\"\" class=\"img-ks-lazyload\"/></p><p><span style=\"margin: 0px 0px 0px 10px; padding: 0px;\">页&nbsp;&nbsp;数:</span><span style=\"margin: 0px 0px 0px 20px; padding: 0px;\">129</span></p><p><img src=\"https://img.alicdn.com/imgextra/i1/101450072/TB28iyVhwxlpuFjSszgXXcJdpXa-101450072.png\" alt=\"\" class=\"img-ks-lazyload\"/></p><p><span style=\"margin: 0px 0px 0px 10px; padding: 0px;\">装&nbsp;&nbsp;帧:</span><span style=\"margin: 0px 0px 0px 20px; padding: 0px;\">平装</span></p><p><img src=\"https://img.alicdn.com/imgextra/i1/101450072/TB28iyVhwxlpuFjSszgXXcJdpXa-101450072.png\" alt=\"\" class=\"img-ks-lazyload\"/></p><p><span style=\"margin: 0px 0px 0px 10px; padding: 0px;\">ISBN:</span><span style=\"margin: 0px 0px 0px 20px; padding: 0px;\">9787550409002</span></p><p><img src=\"https://img.alicdn.com/imgextra/i1/101450072/TB28iyVhwxlpuFjSszgXXcJdpXa-101450072.png\" alt=\"\" class=\"img-ks-lazyload\"/></p><p><img class=\"desc_anchor img-ks-lazyload\" id=\"desc-module-2\" src=\"https://assets.alicdn.com/kissy/1.0.0/build/imglazyload/spaceball.gif\"/></p><p><img alt=\"目录\" src=\"https://img.alicdn.com/imgextra/i1/101450072/TB2dGIjcNRDOuFjSZFzXXcIipXa-101450072.png\" class=\"img-ks-lazyload\"/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>1&nbsp;喝茶聊天中的价格1</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>伤不起的猪肉1</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>当消费者物价指数成为中国猪肉指数2</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>逃不开的价格周期3</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>越来越贵的衣服&nbsp;6</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>服装成本面面观7</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>打折怪圈何其多10</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>抄号族的兴起11</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>房价：到底还会涨吗？&nbsp;&nbsp;12</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>房子，中国人的痛12</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>中国房地产业的美丽与哀愁13</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>谁让房价这么高18</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>宏观调控有用吗？&nbsp;26</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>中国式房价的未来37</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>2&nbsp;必须看懂宏观经济45</p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 0;\"><br/></p><p><span style=\"margin: 0px 18px 0px 0px; padding: 0px;\">●</span>……</p><p><img class=\"desc_anchor img-ks-lazyload\" id=\"desc-module-3\" src=\"https://assets.alicdn.com/kissy/1.0.0/build/imglazyload/spaceball.gif\"/></p><p><img src=\"https://img.alicdn.com/imgextra/i4/101450072/TB2uhKJhB0kpuFjSsppXXcGTXXa-101450072.png\" alt=\"内容虚线\" class=\"img-ks-lazyload\"/></p><p style=\"margin-top: 0px; margin-bottom: 0px; padding: 0px; line-height: 1.4; width: 774.188px; font-size: 40px; font-weight: bold; text-align: center;\">内容简介</p><p><img src=\"https://img.alicdn.com/imgextra/i2/101450072/TB2LvCmhxXkpuFjy0FiXXbUfFXa-101450072.png\" alt=\"\" class=\"img-ks-lazyload\"/></p><p style=\"margin-top: 1.12em; margin-bottom: 1.12em; padding: 0px; line-height: 1.4;\">《生活中点石成金的财务资讯》由何汕媛所著，在**章中，作者深刻地描述并严谨地分析了日常生活中经常被提及的物价问题。肉价为什么会呈现出周期性的涨跌？商场里衣服打折的现象为何越来越普遍？高昂的房价背后究竟是谁在扮演推手？本书对于这些问题的解答可谓一针见血，切中肯綮，相信读者朋友会开卷获益。</p><p style=\"margin-top: 1.12em; margin-bottom: 1.12em; padding: 0px; line-height: 1.4;\">《生活中点石成金的财务资讯》的第二章以宏观调控政策的四大目标(经济增长、充分就业、物价稳定和平衡国际收支)为主线，再加上金融三大关键词(利率、流动性以及货币政策工具)，用简洁的笔力、生动的语言为读者们展现了宏观经济画卷中交错复杂的关系。通过这部分的阅读，你会对经济新闻中所提及的大部分内容拥有崭新的认识。</p><p style=\"margin-top: 1.12em; margin-bottom: 1.12em; padding: 0px; line-height: 1.4;\">本书的第三章则揭示了理财工具背后的故事。俗话有云：“你不理财，财不理你。”但近几年理财行为的火爆却与部分非理性的投资行为存在一定关联，普通人投资亏损的新闻亦屡见报端。银行理财产品......</p><p><img class=\"desc_anchor img-ks-lazyload\" id=\"desc-module-4\" src=\"https://assets.alicdn.com/kissy/1.0.0/build/imglazyload/spaceball.gif\"/></p><p><img src=\"https://img.alicdn.com/imgextra/i2/101450072/TB2pizlkohnpuFjSZFEXXX0PFXa-101450072.png\" alt=\"精彩内容\" class=\"img-ks-lazyload\"/></p><p style=\"margin-top: 1.12em; margin-bottom: 1.12em; padding: 0px; line-height: 1.4;\">&nbsp;&nbsp;&nbsp;&nbsp;在服装价格大肆上涨的同时，“打折”这个商场惯用促销手段也泛滥开来。以前打折促销一般集中于节假日期间。促销产品也多为库存积压或过季服装。进入2012年后，高昂的服装单价似乎已经成为名义上的价格，打折促销也不再仅限于过气商品，商场里以、价格销售的当季服装比比皆是。同样地，服装换季“降价战”也是一年比一年来得早。例如夏装换季，前几年是到立秋后才开始降价，后来是到立秋前，现在大热的三伏天里就敲响了锣鼓。新品的迅速降价，使得消费者学会了观望，在下季新款服装上市后不再急于购买，而是耐心等待价格优惠。即使是新品促销，由于消费者知道不久后会出现更大幅度的打折狂潮，因此其销售情况也就不甚理想。这使得本来就需面对高库存压力的服装企业雪上加霜，他们就不得不通过更大程度的打折促销来解决销售难题。多重原因影响下，出现不打折就卖不动的现象就不难理解了。</p><p style=\"margin-top: 1.12em; margin-bottom: 1.12em; padding: 0px; line-height: 1.4;\">......</p><p><br/></p>";
		JSONArray array = toMiniProgramNodes(content);
		System.out.println(array);
	}
}
